rm(list=ls())
knitr::opts_knit$set(root.dir="/mnt/vmfileshare/ClimateData/")
library(ggplot2)
library(terra)
library(tmap) #pretty maps
library(RColorBrewer)
library(tidyverse)
library(kableExtra)
This is an example notebook for the assessment of bias corrected data, using output from the R ‘qmap’ package for the city of Glasgow and the variable ‘tasmax’.
Input data
This script requires the following data:
The data is required in raster format and dataframe formats
Calibration vs Validation dates
The calibration period runs between 01-12-1980 to the day prior to 01-12-2010 The validation period runs between 01-12-2010 to the day prior to 01-12-2020
An visual comparison of trends across observation, raw and adjusted data for the same time period
Random selection of 3 days of the observation, calibration and two adjusted cals, for three historic days
Adding in the city shapeoutline for prettier maps
shape <-sf::st_as_sf(vect(paste0(dd, "shapefiles/three.cities/", city, "/", city, ".shp")))
tm_shape(obs.cal.rasts[[1]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.raw.rasts.L$`05`[[1]]) +
tm_raster(title="CPM, Raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.adj.rasts.L$`05`[[1]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.cal.rasts[[1]]) + tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.raw.rasts.L$`06`[[1]]) +
tm_raster(title="CPM, Raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.adj.rasts.L$`06`[[1]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.cal.rasts[[1]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.raw.rasts.L$`07`[[1]]) +
tm_raster(title="CPM, raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.adj.rasts.L$`07`[[1]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.cal.rasts[[1]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.raw.rasts.L$`08`[[1]]) +
tm_raster(title="CPM, raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.adj.rasts.L$`08`[[1]]) + tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.cal.rasts[[7081]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.raw.rasts.L$`05`[[7081]]) +
tm_raster(title="CPM, raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.adj.rasts.L$`05`[[7081]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.cal.rasts[[7081]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.raw.rasts.L$`06`[[7081]]) +
tm_raster(title="CPM, raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.adj.rasts.L$`06`[[7081]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.cal.rasts[[7081]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.raw.rasts.L$`07`[[7081]]) +
tm_raster(title="CPM, raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.adj.rasts.L$`07`[[7081]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.cal.rasts[[7081]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.raw.rasts.L$`08`[[7081]]) +
tm_raster(title="CPM, raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.cal.adj.rasts.L$`08`[[7081]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.val.rasts[[1590]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.val.raw.rasts.L$`05`[[1590]]) +
tm_raster(title="CPM, raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.val.adj.rasts.L$`05`[[1590]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.val.rasts[[1590]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.val.raw.rasts.L$`06`[[1590]]) +
tm_raster(title="CPM, Raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.val.adj.rasts.L$`06`[[1590]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.val.rasts[[1590]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.val.raw.rasts.L$`07`[[1590]]) +
tm_raster(title="CPM, raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.val.adj.rasts.L$`07`[[1590]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(obs.val.rasts[[1590]]) +
tm_raster(title="Observation") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.val.raw.rasts.L$`08`[[1590]]) +
tm_raster(title="CPM, raw") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
tm_shape(cpm.val.adj.rasts.L$`08`[[1590]]) +
tm_raster(title="CPM, bias-corrected") +
tm_layout(legend.outside = T) +
tm_shape(shape) + tm_borders(col="black")
#Lists of dfs to summarise the means of
dfL <- c(list(obs.cal.df), cpm.cal.raw.df.L, cpm.cal.adj.df.L)
names(dfL) <- c("obs.cal", paste0("cpm.cal.raw.", names(cpm.cal.raw.df.L)),
paste0("cpm.cal.adj.", names(cpm.cal.raw.df.L)))
#Returns a list of dfs in handy format for graphing
dfg.daily.means <- lapply(dfL, function(i){
x <- 1:ncol(i) #ignore cols 1 & 2 with x y
#Calc mean and sd
dfx <- lapply(x, function(x){
y <- i[,x]
mean <- mean(y, na.rm=T)
sd <- sd(y, na.rm=T)
dfr <- data.frame(mean=mean,
sd.high=mean+sd,
sd.low=mean-sd)
dfr$day <- names(i)[x]
return(dfr)
})
dfx_g <- dfx %>% purrr::reduce(rbind)
})
names(dfg.daily.means) <- names(dfL)
Note : Can add a plot here for daily averages but it’s quite visually confusing so omitting for now
#Annotate season based on month index - the dates have different formats depending on the input data (ie hads vs cpm) so am pulling out the necessary to adjust sep
obs.cal.season.mean <- dfg.daily.means$obs.cal
x <- dfg.daily.means$obs.cal$day
obs.cal.season.mean$season <- ifelse(grepl("1231_|0131_|0228_|0229_", x),
"Winter",
ifelse(grepl("0331_|0430_|0531_", x), "Spring",
ifelse(grepl("0630_|0731_|0831_", x), "Summer", "Autumn")))
#Note = the n days per season is not quite evenly split between the 4 seasons because of how the hads resamples across the year for 360 days
#Create season_year - All Winter months apart from Dec to be added to the previous year (ie Winter 2000) would be the Dec of 2000 to the Feb of 2001
rem <- nchar(var) + 39
year <- substr(x, rem, rem+3)
year <- as.numeric(substr(year, 1,4))
obs.cal.season.mean$season_year <- ifelse(grepl("0131_|0228_|0229_", x),
paste0(year-1, obs.cal.season.mean$season),
paste0(year, obs.cal.season.mean$season))
# Mutate to a seasonal mean df
obs.cal.season.mean <- aggregate(obs.cal.season.mean[[1]], list(obs.cal.season.mean[["season_year"]]), function(x) c(seasonal.mean = mean(x), sd.high.seasonal = mean(x) + sd(x), sd.low.seasonal = mean(x) - sd(x)))
obs.cal.season.mean<- data.frame(season_year=obs.cal.season.mean$Group.1,
seasonal.mean=obs.cal.season.mean$x[,"seasonal.mean"],
sd.high.seasonal = obs.cal.season.mean$x[,"sd.high.seasonal"],
sd.low.seasonal = obs.cal.season.mean$x[,"sd.low.seasonal"])
#Grouping variable for later vars
obs.cal.season.mean$model <- "obs"
For tasmax - grouping to season and calculating the seasonal maxima vals (i.e. rather than means above)
(To be added)
(To be added)
Using the validation data set for this
val.dfs <- c(list(obs.val.df), cpm.val.raw.df.L, cpm.val.adj.df.L)
#Convert dfs to a vector
val.dfs.v <- lapply(val.dfs, function(d){
#Convert to single vector
unlist(as.vector(d))})
val.dfs.v.df <- as.data.frame(val.dfs.v)
names(val.dfs.v.df) <- c("obs.val", paste0("Run", rep(runs, 2), "_", var, ".",rep(c("raw", "adj", 4)))) # Names for easy reference
descriptives <- apply(val.dfs.v.df, 2, function(x){
per <- data.frame(as.list(quantile(x, probs=c(0.1, 0.9))))
data.frame(mean=mean(x), sd=sd(x), min = min(x), per10th=per$X10.,per90th=per$X90., max = max(x))
})
descriptives <- descriptives %>% reduce(rbind)
row.names(descriptives) <- names(val.dfs.v.df)
d <- t(descriptives)
d %>%
kable(booktabs = T) %>%
kable_styling() %>%
row_spec(grep(".bc.",row.names(d)), background = "lightgrey")
| obs.val | Run05_tasmax.raw | Run06_tasmax.adj | Run07_tasmax.4 | Run08_tasmax.raw | Run05_tasmax.adj | Run06_tasmax.4 | Run07_tasmax.raw | Run08_tasmax.adj | |
|---|---|---|---|---|---|---|---|---|---|
| mean | 12.990120 | 12.460644 | 12.047759 | 12.233846 | 12.764617 | 13.551728 | 13.128147 | 13.4808839 | 13.199155 |
| sd | 5.375065 | 5.987282 | 6.000949 | 5.587015 | 5.852800 | 5.557214 | 5.692209 | 5.3415785 | 5.598392 |
| min | -5.222516 | -3.289649 | -2.942725 | -1.848730 | -2.183447 | -2.261268 | -1.499793 | -0.1865713 | -4.036536 |
| per10th | 6.005837 | 4.394897 | 4.245508 | 5.023584 | 5.110010 | 6.493001 | 5.988253 | 6.6283767 | 5.990010 |
| per90th | 19.912904 | 20.198414 | 19.897852 | 19.655664 | 20.234034 | 20.832330 | 20.699387 | 20.5333003 | 20.171180 |
| max | 31.605967 | 32.398094 | 31.079248 | 32.790184 | 35.011864 | 34.132040 | 31.410919 | 33.5803528 | 34.205112 |
Note - need to add back in some facetting to this fig
m <- reshape2::melt(val.dfs.v.df)
ggplot(m, aes(value, fill=variable, colour=variable)) +
geom_density(alpha = 0.3, position="identity") +
theme_minimal() +
scale_fill_brewer(palette = "Set1") +
scale_color_brewer(palette = "Set1")
Using the following to assess overall fit:
actual <- val.dfs.v.df$obs.val
rsq <- sapply(val.dfs.v.df[c(2:ncol(val.dfs.v.df))], function(x){
cor(actual, x)^2
})
rmse <- sapply(val.dfs.v.df[c(2:ncol(val.dfs.v.df))], function(x){
sqrt(mean((actual - x)^2))
})
pbias <- sapply(val.dfs.v.df[c(2:ncol(val.dfs.v.df))], function(x){
hydroGOF::pbias(x, actual)
})
nse <- sapply(val.dfs.v.df[c(2:ncol(val.dfs.v.df))], function(x){
hydroGOF::NSE(x, actual)
})
Highlighting the bias corrected statistics
k <- cbind(rsq, rmse, pbias, nse)
k %>%
kable(booktabs = T) %>%
kable_styling() %>%
row_spec(grep(".bc.",row.names(k)), background = "lightgrey")
| rsq | rmse | pbias | nse | |
|---|---|---|---|---|
| Run05_tasmax.raw | 0.5555510 | 4.128585 | -4.1 | 0.4100210 |
| Run06_tasmax.adj | 0.5535077 | 4.218502 | -7.3 | 0.3840428 |
| Run07_tasmax.4 | 0.5051351 | 4.241554 | -5.8 | 0.3772924 |
| Run08_tasmax.raw | 0.5331874 | 4.153865 | -1.7 | 0.4027737 |
| Run05_tasmax.adj | 0.5464456 | 3.990959 | 4.3 | 0.4486993 |
| Run06_tasmax.4 | 0.5580189 | 3.949778 | 1.1 | 0.4600178 |
| Run07_tasmax.raw | 0.5082040 | 4.090093 | 3.8 | 0.4209707 |
| Run08_tasmax.adj | 0.5298693 | 4.058097 | 1.6 | 0.4299947 |
(Not considered consecutively here)
(to be added)
The number of quantiles selected will effect the efficacy of the bias correction: lots of options therefore with this specific method